Stops¶
Downloads + Imports¶
# https://www.vbb.de/media/download/2029
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
resp = urlopen("https://www.vbb.de/media/download/2029")
%time zipfile = ZipFile(BytesIO(resp.read()))
zipfile.namelist()
CPU times: user 135 ms, sys: 84.1 ms, total: 219 ms
Wall time: 2.27 s
['agency.txt',
'calendar.txt',
'calendar_dates.txt',
'frequencies.txt',
'pathways.txt',
'routes.txt',
'shapes.txt',
'stop_times.txt',
'stops.txt',
'transfers.txt',
'trips.txt']
#import sys
#!{sys.executable} -m pip install datashader
import pandas as pd
import numpy as np
import mplleaflet
import folium
import holoviews as hv
import holoviews.operation.datashader as hd
hd.shade.cmap=["lightblue", "darkblue"]
hv.extension("bokeh", "matplotlib")
import datashader as ds
import datashader.transfer_functions as tf
Setup Plotting¶
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
sns.set_style(
style='darkgrid',
rc={'axes.facecolor': '.9', 'grid.color': '.8'}
)
sns.set_palette(palette='deep')
sns_c = sns.color_palette(palette='deep')
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 100
from bokeh.plotting import figure, output_notebook, show, reset_output
from bokeh.tile_providers import OSM, get_provider
output_notebook()
Read and format data¶
%time stops_df = pd.read_csv(zipfile.open('stops.txt'))
stops_df.tail()
stops_df.info()
CPU times: user 98.3 ms, sys: 8.18 ms, total: 106 ms
Wall time: 106 ms
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41733 entries, 0 to 41732
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 stop_id 41733 non-null object
1 stop_code 0 non-null float64
2 stop_name 41733 non-null object
3 stop_desc 0 non-null float64
4 stop_lat 41733 non-null float64
5 stop_lon 41733 non-null float64
6 location_type 41733 non-null int64
7 parent_station 28607 non-null float64
8 wheelchair_boarding 2944 non-null float64
9 platform_code 4419 non-null object
10 zone_id 15336 non-null object
dtypes: float64(6), int64(1), object(4)
memory usage: 3.5+ MB
stops_df.fillna('', inplace=True)
stops_df = stops_df.drop(['stop_code', 'stop_desc'], axis=1)
stops_df.loc[stops_df["wheelchair_boarding"] == '','wheelchair_boarding'] = 0
stops_df_multiple_stops = stops_df.copy()
stops_df.drop_duplicates(subset=['stop_name', 'location_type', 'wheelchair_boarding', 'platform_code'],keep='first', inplace = True)
stops_df.head()
| stop_id | stop_name | stop_lat | stop_lon | location_type | parent_station | wheelchair_boarding | platform_code | zone_id | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 000008012713 | Rangsdorf, Bahnhof | 52.294125 | 13.431112 | 0 | 900000245025.0 | 0 | ||
| 1 | 000008010205 | Leipzig, Hauptbahnhof | 51.344817 | 12.381321 | 0 | 900000550090.0 | 0 | ||
| 2 | 000008010327 | Senftenberg, Bahnhof | 51.526790 | 14.003977 | 0 | 900000435000.0 | 0 | ||
| 3 | 000008010324 | Schwerin, Hauptbahnhof | 53.635261 | 11.407520 | 0 | 900000550112.0 | 0 | ||
| 4 | 000008012393 | Mühlanger, Bahnhof | 51.855704 | 12.748198 | 0 | 900000550319.0 | 0 |
stops_df.apply(lambda x: x.unique().size, axis=0)
stop_id 28907
stop_name 13119
stop_lat 13069
stop_lon 13082
location_type 2
parent_station 13088
wheelchair_boarding 2
platform_code 59
zone_id 14586
dtype: int64
# visualization with folium: takes way longer + more memory consumption than bokeh
#f = folium.Figure(width=800, height=600)
#m = folium.Map(location=[45.5236, -122.6750], prefer_canvas=True).add_to(f)
#for lat, lon in zip(stops_df['stop_lat'], stops_df['stop_lon']):
# folium.CircleMarker(
# location=[lat, lon],
# radius=1,
# color="#3186cc",
# fill=True,
# fill_color="#3186cc",
# ).add_to(m)
#m
def merc_from_arrays(lats, lons):
r_major = 6378137.000
x = r_major * np.radians(lons)
scale = x/lons
y = 180.0/np.pi * np.log(np.tan(np.pi/4.0 + lats * (np.pi/180.0)/2.0)) * scale
return (x, y)
p = figure(plot_width=800, plot_height=700,title="Public Transport Stops of VBB",tools="pan,wheel_zoom",
x_range=(1215654.4978, 1721973.3732), y_range=(6533225.6816, 7296372.9720),
x_axis_type="mercator", y_axis_type="mercator",
tooltips=[("Name", "@stop_name"), ("platform", "@platform_code"), ("(Lat, Lon)", "(@stop_lat, @stop_lon)")])
p.add_tile(get_provider(OSM))
stops_df['merc_x'], stops_df['merc_y'] = merc_from_arrays(stops_df['stop_lat'], stops_df['stop_lon'])
p.circle(x='merc_x', y='merc_y', source=stops_df)
show(p)
hv.output(backend="bokeh")
tiles = hv.element.tiles.OSM().opts(alpha=0.5)
stops = hv.Points(stops_df, ['merc_x', 'merc_y'], label='Public Transport Stops')
stops_wa = hv.Points(stops_df.loc[stops_df['wheelchair_boarding'] == 1], ['merc_x', 'merc_y'], label='Wheelchair accessible Stops')
tiles * hd.datashade(stops) + tiles * hd.datashade(stops_wa)
Stations with most stops¶
stops_df_multiple_stops['stop_name'].value_counts().head(10)
S Potsdam Hauptbahnhof 23
S Wannsee Bhf (Berlin) 19
Potsdam, Medienstadt Babelsberg Bhf 19
Cottbus, Hauptbahnhof 19
Fürstenwalde, Bahnhof 17
Potsdam, Johannes-Kepler-Platz 17
S+U Zoologischer Garten Bhf (Berlin) 17
S Schöneweide/Sterndamm (Berlin) 16
S+U Berlin Hauptbahnhof 16
Frankfurt (Oder), Bahnhof 16
Name: stop_name, dtype: int64
num_stops = stops_df_multiple_stops.groupby(['stop_name']).agg(num_stops=('stop_id', 'count')).query('num_stops > 1').sort_values('num_stops', ascending=False)
num_stops.describe()
| num_stops | |
|---|---|
| count | 13087.000000 |
| mean | 3.186445 |
| std | 1.292919 |
| min | 2.000000 |
| 25% | 3.000000 |
| 50% | 3.000000 |
| 75% | 3.000000 |
| max | 23.000000 |
num_stops_mean = num_stops['num_stops'].mean()
num_stops_median = num_stops['num_stops'].median()
fig, ax = plt.subplots()
sns.histplot(x='num_stops', data=num_stops, color=sns_c[3], ax=ax, discrete=True)
ax.axvline(x=num_stops_mean, color=sns_c[1], linestyle='--', label=f'mean = {num_stops_mean: ,.2f}')
ax.axvline(x=num_stops_median, color=sns_c[4], linestyle='--',label=f'median = {num_stops_median}')
ax.legend(loc='upper right')
ax.set(title='Number of Stops per Location', xlabel='number of stops', xlim=(0, None))
[Text(0.5, 1.0, 'Number of Stops per Location'),
Text(0.5, 0, 'number of stops'),
(0.0, 24.6)]